********************************************************************************
*PROJECT: Legislature Integration and Bipartisanship: A Natural Experiment in Iceland							   
*PURPOSE: Prep dyadic-level co-sponsorship outcomes, inc. month-level											   
********************************************************************************

clear all
set more off
pause on
set maxvar 120000

local mpfolder = "$path_pch\Data\raw\MP"
local cosponfolder = "$path_pch\Data\raw\Co-Sponsorship"
local seatingfolder = "$path_pch\Data\intermediate\Seating"
local cosponfolderinter = "$path_pch\Data\intermediate\Co-Sponsorship"
local cosponfolderanaly = "$path_pch\Data\analysis\Co-Sponsorship"
local runsRI = 1000
tempfile main
tempfile join
tempfile cosponData


*********************************************************************
// Save bill "busy-ness"
*********************************************************************

import delimited using "`cosponfolder'\\cosponsorship.csv", clear enc("utf-8") varn(1)
g month=substr(billdate,4,2)
destring month, replace
ren sessionnum session_id
drop if session_id==114 | session_id==149 // no seating for 114, almost totally missing for 149
keep billnum session_id month
bys session_id billnum: g num_sponsor = _N
duplicates drop
replace num_sponsor=2 if num_sponsor>1
bys month num_sponsor: g NBills = _N
keep month num_sponsor NBills
duplicates drop

forv i=1(1)2 {
	g NBills`i'_tmp = NBills if num_sponsor==`i'
}
forv i=1(1)2 {
	bys month: egen NBills`i' = mode(NBills`i'_tmp)
	drop NBills`i'_tmp
}
keep month NBills?
duplicates drop

la var NBills1 "number of bills ever moved by 1 sponsor during this month"
la var NBills2 "number of bills ever moved by >1 sponsor during this month"

g NBills = NBills1+NBills2
la var NBills "number of bills ever moved during this month"

sort NBills1, stable
g order_tmp = _n
g busy = order_tmp>=10
drop order_tmp

save "`cosponfolderinter'/busy_months.dta", replace

tempfile busyness
save `busyness'


*********************************************************************
// Prep Cospon Data (dyads; including lead and lag)
*********************************************************************

local short cospon res rep

local i=0

foreach x in cosponsorship resolutions reports {

local i=`i'+1
local shortname: word `i' of `short'

// Bring Scraped data (source for bills: http://www.althingi.is//thingstorf/thingmalalistar-eftir-thingum/lagafrumvorp/?lthing=115)
import delimited using "`cosponfolder'\\`x'.csv", clear enc("utf-8") varn(1)

// renaming if necessary
ren sessionnum session_id
ren mp_id MP_id

// check unique identifier
isid session_id billnum MP_id

// keep relevant obs.
drop if session_id==114 // no seating
bys session_id billnum: g num_sponsor = _N
order session_id billnum num_sponsor

// sponsor/mover dummy
g sponsor = sponsororder==1
drop sponsororder

// break up date variable
g day=substr(billdate,1,2)
destring day, replace
g month=substr(billdate,4,2)
destring month, replace
g year=substr(billdate,7,4)
destring year, replace

// create first half dummy variable (at bill-session-level)
preserve
	keep if sponsor==1
	keep session_id year month day billnum
	sort session_id year month day billnum
	by session_id: g billorder = _n
	by session_id: egen maxbillorder = max(billorder)
	g medianbillorder = maxbillorder/2
	g firsthalf = billorder<=medianbillorder
	keep session_id billnum firsthalf
	tempfile firsthalf
	save `firsthalf', replace
restore

// merge on first half variable
merge m:1 session_id billnum using `firsthalf'
assert _merge==3
drop _merge
tempfile inter
save `inter', replace


*********************************************************************
// Loop over all, first-half, and second-half bills
*********************************************************************

forv h=1/15 {
	
	use `inter', clear

	if `h'==1 { // all
		drop firsthalf
		local suf=""
	}
	else if `h'==2 { // first half
		keep if firsthalf==1
		drop firsthalf
		local suf="_1st"
	}
	else if `h'==3 { // second half
		keep if firsthalf==0
		drop firsthalf
		local suf="_2nd"
	}
	else if `h'>=4 { // month-by-month
		local monthno = `h'-3
		keep if month==`monthno'
		local suf="_m`monthno'"
	}

	// calculate number of sponsors and co-sponsors per MP-session(-month)
	bys session_id MP_id sponsor: g tmp = _N
	g spontmp = tmp if sponsor==1
	g cosptmp = tmp if sponsor==0
	bys session_id MP_id: egen num_bill_spon = max(spontmp)
	bys session_id MP_id: egen num_bill_cospon = max(cosptmp)
	replace num_bill_spon=0 if num_bill_spon==.
	replace num_bill_cospon=0 if num_bill_cospon==.
	g num_bill_coany = num_bill_spon + num_bill_cospon
	assert num_bill_coany!=.
	drop spontmp cosptmp tmp

	// save MP-bill-level dataset
	save "`cosponfolderinter'/`shortname'_billMPlevel`suf'.dta", replace

	// drop bills with no co-sponsors (since no info on links)
	keep if num_sponsor != 1
	
	// only proceed if positive number of observations
	if _N==0 {
		local `shortname'`suf'_miss = 1
	}
	else if _N>0 {
		local `shortname'`suf'_miss = 0	
		keep session_id billnum MP_id num_bill_spon num_bill_cospon num_bill_coany sponsor
		save `main', replace

		// create dyads
		preserve
			ren MP_id MP_id_d
			ren num_bill_spon num_bill_spon_d
			ren num_bill_cospon num_bill_cospon_d
			ren num_bill_coany num_bill_coany_d
			ren sponsor sponsor_d
			save `join', replace
		restore
		joinby session_id billnum using `join'
		keep if MP_id < MP_id_d // only need 1 obs per pair of MPs 

		// productivity difference measures - sponsorship and co-sponsorship
		g prod_diff_spon = abs(num_bill_spon - num_bill_spon_d) 
		g prod_diff_cospon = abs(num_bill_cospon - num_bill_cospon_d) 
		g prod_diff_coany = abs(num_bill_coany - num_bill_coany_d)

		// cosine similarity-style denominator
		g denom_cospon = sqrt(num_bill_cospon)*sqrt(num_bill_cospon_d)
		g denom_coany = sqrt(num_bill_coany)*sqrt(num_bill_coany_d)

		// total productivity measure - sponsorship and co-sponsorship 
		g pair_prod_spon = num_bill_spon + num_bill_spon_d 
		g pair_prod_cospon = num_bill_cospon + num_bill_cospon_d 

		// generate link/cooperation outcomes
		g link_cospon=sponsor==sponsor_d // both are co-sponsors of the same bill
		g link_coop=sponsor!=sponsor_d // one is sponsor, one is co-sponsor

		bys session_id MP_id MP_id_d: egen cospon = sum(link_cospon)
		bys session_id MP_id MP_id_d: egen coop = sum(link_coop)
		g coany = cospon + coop

		keep session_id MP_id MP_id_d cospon coop coany prod_diff_* denom_* pair_prod_* num_bill_cospon* num_bill_coany*

		// make (month-)session-pair level dataset
		duplicates drop

		// make binary link outcomes
		g cosponBin = cospon>0 // =1 if at least one co-sponsorship link that session(-month)
		g coopBin = coop>0	
		g coanyBin = coany>0

		// temp save
		save `cosponData', replace

		forv k = 1/2 {
			local l: word `k' of "1" "-1"
			local t: word `k' of "Lag" "Lead"
			use `cosponData', clear
			replace session_id = session_id + `l'
			ren cospon cospon`t'
			ren coop coop`t'
			ren coany coany`t'
			ren cosponBin cosponBin`t'
			ren coopBin coopBin`t'
			ren coanyBin coanyBin`t'
			ren prod_diff_spon prod_diff_spon`t'
			ren pair_prod_spon pair_prod_spon`t'
			ren prod_diff_cospon prod_diff_cospon`t'
			ren pair_prod_cospon pair_prod_cospon`t'	
			ren denom_cospon denom_cospon`t'
			ren denom_coany denom_coany`t'
			ren num_bill_cospon num_bill_cospon`t'
			ren num_bill_cospon_d num_bill_cospon_d`t'
			ren num_bill_coany num_bill_coany`t'
			ren num_bill_coany_d num_bill_coany_d`t'
			tempfile cosponData`t'
			save `cosponData`t'', replace
		}


*********************************************************************
// Bring MP dyads, match to the cospon data
*********************************************************************

		// bring MP data
		use "`seatingfolder'\seating_MP.dta", clear

		// just keep basic vars
		keep session_id MP_id
			
		// create dyads
		preserve
			ren MP_id MP_id_d
			tempfile holding
			save `holding'
		restore
		joinby session_id using `holding'
		keep if MP_id < MP_id_d

		// merge cospon data
		forv k = 1/3 {
			local t: word `k' of "" "Lag" "Lead"
			merge 1:1 session_id MP_id MP_id_d using `cosponData`t'', keep(1 3) nogen
			// ~13% co-spons links don't merge (_m=2), possibly because MP changed halfway through session. may want to dig deeper (but unlikely to be problem)
			// DJ: these are likely be dyads with replacements (vara-althing something), who occasionally joined bills as cosponsors
		}

		// enter zero cospons/prod when missing, but could be observed
		foreach y in cospon cosponBin coop coopBin coany coanyBin prod_diff_spon prod_diff_cospon pair_prod_spon pair_prod_cospon ///
			denom_cospon denom_coany num_bill_cospon num_bill_cospon_d num_bill_coany num_bill_coany_d {
			replace `y' = 0 if mi(`y')
			replace `y'Lag = 0 if mi(`y'Lag)
			replace `y'Lag = . if session_id<=115 // don't know past
			replace `y'Lead = 0 if mi(`y'Lead)
			replace `y'Lead = . if session_id>=148 // don't know future
		}

		// create inverse hyperbolic sine transformed outcomes (Dean (2018), Burbidge et al. (1988)) and normalized outcomes
		foreach y in cospon cosponLag cosponLead coop coopLag coopLead coany coanyLag coanyLead {
			g ihs_`y' = ln(`y' + sqrt(1 + `y'^2))
		}

		// normalized outcomes
		forv k = 1/3 {
			local t: word `k' of "" "Lag" "Lead"
			g cospon_norm`t' = cospon`t'/denom_cospon`t'
			g coop_norm`t' = coop`t'/denom_coany`t'
			g coany_norm`t' = coany`t'/denom_coany`t' 
			replace cospon_norm`t'=0 if cospon_norm`t'==. & cospon`t'!=. // but not clear if this is appropriate...
			replace coop_norm`t'=0 if coop_norm`t'==. & coop`t'!=. // but not clear if this is appropriate...
			replace coany_norm`t'=0 if coany_norm`t'==. & coany`t'!=. // but not clear if this is appropriate...			
		}

		// just keep co-sponsorship relevant variables, then can merge RHS variables onto this later
		isid session_id MP_id MP_id_d 
		keep session_id MP_id MP_id_d coop* cospon* coany* pair_prod* prod_diff* denom_* ihs_* num*

		// label vars
		la var prod_diff_spon "abs diff. in # bills sponsored between MP pair in current session"
		la var prod_diff_cospon "abs diff. in # bills co-sponsored between MP pair in current session"

		la var denom_cospon "cosine similarity-style denominator for cospon=sqrt(lengthA)*sqrt(lengthB)"
		la var denom_coany "cosine similarity-style denominator for coany"

		la var pair_prod_spon "# bills sponsored by MP 1 + # bills sponsored by MP 2 in pair in current session"
		la var pair_prod_cospon "# bills co-sponsored by MP 1 + # bills co-sponsored by MP 2 in pair in current session"

		la var cospon "# bills MP pair both co-sponsored in current session"
		la var cosponBin "=1 if cospon>0"
		la var ihs_cospon "inverse hyperbolic sine transformation of cospon"
		la var cospon_norm "cospon/denom_cospon"

		la var coop "# bills MP pair had 1 sponsoring and 1 co-sponsoring in current session"
		la var coopBin "=1 if coop>0"
		la var ihs_coop "inverse hyperbolic sine transformation of coop"
		la var coop_norm "coop/denom_coany"
		
		la var coany "# bills MP pair overlap on, whether as sponsor or co-sponsor, in current session"
		la var coanyBin "=1 if coany>0"
		la var ihs_coany "inverse hyperbolic sine transformation of coany"
		la var coany_norm "coany/denom_coany"		

		tempfile dyads_with_`shortname'`suf'
		save `dyads_with_`shortname'`suf''
	}
}
}


*********************************************************************
// Create right-hand-side variables, especially neighbor treatment
*********************************************************************

// bring MP data
use "`seatingfolder'\seating_MP_frontback.dta", clear

// just keep basic vars
keep session_id MP_id MP_name seatno_init seat_initRI* gender party party_id ///
	age random strata coalition ?_MP_idRI* numNeighborRI* sessExper left_right
	
// create dyads
preserve
	d, varlist
	local list = r(varlist)
	foreach var of varlist `list' {
		ren `var' `var'_d
	}
	ren session_id_d session_id
	tempfile holding
	save `holding'
restore
joinby session_id using `holding'
keep if MP_id < MP_id_d

// generate neighbor treatment variable
forv i=0/`runsRI' {
	g neighborRI`i' = (U_MP_idRI`i'==MP_id_d) | (L_MP_idRI`i'==MP_id_d)
	g neighborFBRI`i' = (F_MP_idRI`i'==MP_id_d) | (B_MP_idRI`i'==MP_id_d)
	g neighbor360RI`i' = neighborRI`i'+neighborFBRI`i'		
}

// generate similarity variables and interactions
foreach var of varlist gender coalition party {
	g same`var' = `var'==`var'_d
	
	foreach x in RI FBRI 360RI {
		forv i=0/`runsRI' {
			g neighbor`x'`i'Xsame`var' = neighbor`x'`i'*same`var'
			g neighbor`x'`i'Xdiff`var' = neighbor`x'`i'*(1-same`var')
		}
	}
}

// generate age-diff, sessExper-diff, left_right-diff interactions
foreach x in age sessExper left_right {

	g `x'Diff = abs(`x' - `x'_d)
	la var `x'Diff "abs(`x' - `x'_d)"
	
	forv i=0/`runsRI' {
		g neighborRI`i'X`x'Diff = neighborRI`i'*`x'Diff
	}
}

// generate experience similarity variables and interactions
g sameexperience = sessExperDiff<=5
g diffexperience = sessExperDiff>5
foreach x in RI FBRI 360RI {
	forv i=0/`runsRI' {
		g neighbor`x'`i'Xsameexperience = neighbor`x'`i'*sameexperience
		g neighbor`x'`i'Xdiffexperience = neighbor`x'`i'*(1-sameexperience)
	}
}

// generate ideological distance variables

* above vs. below-median, conditional on sameparty=0
g partyClose = left_rightDiff<2.8947 if !mi(left_rightDiff)
g partyFar = left_rightDiff>=2.8947 if !mi(left_rightDiff)
la var partyClose "=1 if left_rightDiff<2.8947 (median for diffparty pairs)"
la var partyFar "=1 if left_rightDiff>=2.8947 (median for diffparty pairs)"

* terciles, conditional on sameparty=0
g partyVClose = left_rightDiff<=2.3635 if !mi(left_rightDiff)
g partyMid = left_rightDiff>2.3635 & left_rightDiff<=3.3211 if !mi(left_rightDiff)
g partyVFar = left_rightDiff>3.3211 if !mi(left_rightDiff)
la var partyVClose "=1 if left_rightDiff<2.3635 (lowest tercile for diffparty pairs)"
la var partyMid "=1 if left_rightDiff>2.3635 & <=3.3211 (middle tercile for diffparty pairs)"
la var partyVFar "=1 if left_rightDiff>3.3211 (higher tercile for diffparty pairs)"

* interactions
foreach var of varlist partyClose partyFar partyVClose partyMid partyVFar {
	forv i=0/`runsRI' {
		g neighborRI`i'X`var' = neighborRI`i'*`var'
	}
}

// generate corner var
forv i=0/`runsRI' {
	g cornerRI`i' = numNeighborRI`i'==1 | numNeighborRI`i'_d==1
	g middleRI`i' = 1 - cornerRI`i'
	g neighborRI`i'XcornerRI`i' = neighborRI`i'*cornerRI`i'
	g neighborRI`i'XmiddleRI`i' = neighborRI`i'*middleRI`i'
}

// other important variables
g maxMP = max(MP_id,MP_id_d)
g minMP = min(MP_id,MP_id_d)
egen pairFE = group(minMP maxMP)
g pairStrata = 0 if strata == 0 & strata_d == 0
replace pairStrata = 1 if strata != strata_d
replace pairStrata = 2 if strata == 1 & strata_d == 1

// group vars
egen sessionXpairStrataXpartyPair = group(session_id pairStrata party_id party_id_d)
egen sessionXpartyPair = group(session_id party_id party_id_d)

// speaker dummy variable
g speaker= random==3
g speaker_d= random_d==3
g speaker_in_pair = speaker==1 | speaker_d==1

// make special and short session dummies
g specialSession = inlist(session_id,119,124,129,134,137,142)
g shortSession = session_id==147

/* Note special/short sessions N bills:
119: 29 
124: 3
129: 0
134: 9
137: 56
142: 20
147: 39 
otherwise 131-247 bills per session
*/	

// first and last session indicators
g firstSession = inlist(session_id,115,119,120,124,125,129,130,134,135,137,138,142,143,146,148)
la var firstSession "special session or first proper session of term - should probably not use these sessions when lagged outcome"
g lastSession = inlist(session_id,118,123,128,133,136,141,145,147)
la var lastSession "last session of term - should probably not use these sessions when lead outcome"
g leadShortIncomplete = inlist(session_id,146,148)
la var leadShortIncomplete "next session is short or incomplete - should probably not use these sessions when lead outcome"

// drop unnecessary variables
drop U_* L_* F_* B_*

// save
tempfile rhs
save `rhs'


*********************************************************************
// Combine bill and resolutions sponsorship data
*********************************************************************

// rename resolutions and reports variables
foreach h in "" "_1st" "_2nd" "_m1" "_m2" "_m3" "_m4" "_m5" ///
	"_m6" "_m7" "_m8" "_m9" "_m10" "_m11" "_m12" {
	foreach x in cospon res rep {
		if ``x'`h'_miss' == 0 {
			use `dyads_with_`x'`h'', clear
			ds session_id MP_id MP_id_d, not
				foreach var of varlist `r(varlist)' {
					if strpos("`h'","m")>0 & "`x'"!="cospon" {
						ren `var' `x'_`var'
					}
					else if strpos("`h'","m")==0 {
						if "`x'"!="cospon" {
							ren `var' `x'_`var'`h'
						}
						else {
							ren `var' `var'`h'
						}
					}
				}
			save `dyads_with_`x'`h'', replace
		}
	}
}

// merge bill, resolutions and reports data
use `dyads_with_cospon', clear

local i=0
foreach h in "" "_1st" "_2nd" {
	foreach x in cospon res rep {
		local i=`i'+1
		
		if `i'!=1 {
			merge 1:1 session_id MP_id MP_id_d using `dyads_with_`x'`h''
			assert _merge==3
			drop _merge
		}
	}
}
	
// create omnibus denom and outcomes
foreach h in "" "_1st" "_2nd" {
	forv k = 1/3 {
		local t: word `k' of "" "Lag" "Lead"
		
		g omni_denom_cospon`t'`h' = sqrt(num_bill_cospon`t'`h'+res_num_bill_cospon`t'`h'+rep_num_bill_cospon`t'`h')*sqrt(num_bill_cospon_d`t'`h'+res_num_bill_cospon_d`t'`h'+rep_num_bill_cospon_d`t'`h')	
		g omni_denom_coany`t'`h' = sqrt(num_bill_coany`t'`h'+res_num_bill_coany`t'`h'+rep_num_bill_coany`t'`h')*sqrt(num_bill_coany_d`t'`h'+res_num_bill_coany_d`t'`h'+rep_num_bill_coany_d`t'`h')			
		
		g omni_cospon`t'`h' = cospon`t'`h' + res_cospon`t'`h' + rep_cospon`t'`h'
		g omni_coop`t'`h' = coop`t'`h' + res_coop`t'`h' + rep_coop`t'`h'
		g omni_coany`t'`h' = coany`t'`h' + res_coany`t'`h' + rep_coany`t'`h'
		
		egen omni_cosponBin`t'`h' = rowmax(cosponBin`t'`h' res_cosponBin`t'`h' rep_cosponBin`t'`h')
		egen omni_coopBin`t'`h' = rowmax(coopBin`t'`h' res_coopBin`t'`h' rep_coopBin`t'`h')	
		egen omni_coanyBin`t'`h' = rowmax(coanyBin`t'`h' res_coanyBin`t'`h' rep_coanyBin`t'`h')
		
		g omni_ihs_cospon`t'`h' = ln(omni_cospon`t'`h' + sqrt(1 + omni_cospon`t'`h'^2))
		g omni_ihs_coop`t'`h' = ln(omni_coop`t'`h' + sqrt(1 + omni_coop`t'`h'^2))
		g omni_ihs_coany`t'`h' = ln(omni_coany`t'`h' + sqrt(1 + omni_coany`t'`h'^2))
		
		g omni_cospon_norm`t'`h' = omni_cospon`t'`h'/omni_denom_cospon`t'`h'
		g omni_coop_norm`t'`h' = omni_coop`t'`h'/omni_denom_coany`t'`h' 
		g omni_coany_norm`t'`h' = omni_coany`t'`h'/omni_denom_coany`t'`h'
		
		replace omni_cospon_norm`t'`h' = 0 if omni_cospon_norm`t'`h'==. & omni_cospon`t'`h'!=.
		replace omni_coop_norm`t'`h' = 0 if omni_coop_norm`t'`h'==. & omni_coop`t'`h'!=.
		replace omni_coany_norm`t'`h' = 0 if omni_coany_norm`t'`h'==. & omni_coany`t'`h'!=.
	}
}

// drop some vars don't need
drop res*num* rep*num* *num*1st *num*2nd *num*Lag *num*Lead

// merge on rhs
merge 1:1 session_id MP_id MP_id_d using `rhs'
assert _merge==3
drop _merge

save "`cosponfolderanaly'/dyads_with_cospon", replace

beep
